How many runs per farm are needed?

1 Introduction

Originally, each farm was run with 5000 Monte-Carlo simulation runs. This takes quite a long time (4-5 minutes per farm and feed). But is this necessary?

2 Set up data for MC testing

Only use a small but representative sample of farms (n = 50) across the mean temperature spectrum.

Sys.setenv(TAR_PROJECT = "project_farmruns")

# mani <- tar_manifest()

feed_params <- tar_read(reference_feed)
farm_static_data <- tar_read(farm_static_data_chunked)
farm_static_data <- split(farm_static_data, farm_static_data$farm_ID)
farm_ts_data <- tar_read(farm_ts_data_chunked)
all_params <- tar_read(all_params)

ts_means <- farm_ts_data %>% 
  imap(~ mutate(.x, farm_ID = as.integer(str_split_i(.y, "_", 6)))) %>% 
  lapply(function(df){
    df %>% 
      group_by(farm_ID) %>% 
      reframe(mean = mean(temp_c))
  }) %>% 
    bind_rows()

# Get a small sample
samp <- ts_means %>%
  mutate(temp_bin = ntile(mean, 15)) %>%
  group_by(temp_bin) %>%
  slice_sample(n = 2) %>%
  ungroup() %>%
  pull(farm_ID)

farm_static_data <- farm_static_data[samp]
farm_ts_data <- farm_ts_data[samp]
# Set up parallel processing - make sure to comment this out when rendering with quarto
# plan(multisession, workers = parallelly::availableCores()-2)

# This is uniform
farm_run_singular <- function() {
  furrr::future_map2_dfr(farm_ts_data, farm_static_data, function(ts, static) {
    uni_farm_growth(
      pop_params = all_params,
      species_params = all_params,
      water_temp = ts$temp_c,
      feed_params = feed_params,
      times = c(t_start = static$t_start, t_end = static$t_end, dt = 1),
      N_pop = ts$Npop
    ) %>% 
      furrr::future_map_dfr(function(mat) {
        mat %>% 
          as.data.frame() %>% 
          rename(t = V1, mean = V2, sd = 0) %>% 
          mutate(
            farm_ID = as.integer(static$farm_ID),
            t = as.integer(t)
          )
        }, 
        .id = "measure",
        .options = furrr_options(seed = T)
      ) %>% 
      mutate(measure = as.factor(measure))
  },
  .options = furrr_options(seed = T),
  .progress = T
  )
}

# This has the MC runs in it
farm_run_MC = function(n) {
  furrr::future_map2_dfr(farm_ts_data, farm_static_data, function(ts, static) {
    fg <- farm_growth(
      pop_params = all_params,
      species_params = all_params,
      water_temp = ts$temp_c,
      feed_params = feed_params,
      times = c(t_start = static$t_start, t_end = static$t_end, dt = 1),
      N_pop = ts$Npop,
      nruns = n
    )
    fg %>% 
      furrr::future_map_dfr(function(mat) {
        mat %>% 
          as.data.frame() %>% 
          rename(t = V1, mean = V2, sd = V3) %>% 
          mutate(
            farm_ID = as.integer(static$farm_ID),
            t = as.integer(t)
          )
        }, 
        .id = "measure",
        .options = furrr_options(seed = T)
      ) %>% 
      mutate(measure = as.factor(measure))
  },
  .options = furrr_options(seed = T),
  .progress = T
  )
}
if (!file.exists(file.path(output_sens_data_path, "res_nruns_5000.qs")) | overwrite == T) {
  res_5000 <- farm_run_MC(5000)
  res_5000 <- res_5000 %>% mutate(nruns = as.factor(5000))
  qsave(res_5000, file.path(output_sens_data_path, "res_nruns_5000.qs"))
}

res_5000 <- qread(file.path(output_sens_data_path, "res_nruns_5000.qs"))

3 Difference between uniform run and 5000 nruns

First of all, how does the MC run approach using 5000 runs (the default) differ from only using a uniform fish weight/maximum ingestion?

if (!file.exists(file.path(output_sens_data_path, "res_nruns_1.qs")) | overwrite == T) {
  res_single <- farm_run_singular() %>% 
    mutate(nruns = as.factor(1), sd = 0) %>% 
    relocate(sd, .after = mean)

  qsave(res_single, file.path(output_sens_data_path, "res_nruns_1.qs"))
}

res_single <- qread(file.path(output_sens_data_path, "res_nruns_1.qs"))
weight_single <- res_single %>% 
  filter(measure == "weight_stat")
weight_5000 <- res_5000 %>% 
  filter(measure == "weight_stat")

ggplot(
  rbind(weight_single, weight_5000),
  aes(x = t, y = mean, ymin = mean-sd, ymax = mean+sd, colour = nruns, fill = nruns)
) +
  geom_line() +
  geom_ribbon(alpha = 0.25) +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 1: Individual weight at each timestep, when the farm has been run with 5000 Monte-Carlo runs (with a range of fish weights/maximum ingestion values) and when only mean fish weight/maximum ingestion was used.
df_diff_1 <- rbind(weight_single, weight_5000) %>% 
  select(-sd) %>% 
  pivot_wider(
    names_from = "nruns", names_prefix = "nruns_",
    values_from = mean
  ) %>% 
    mutate(nruns_diff = (nruns_1 - nruns_5000)/nruns_5000)

ggplot(df_diff_1, aes(x = t, y = 100*nruns_diff)) +
  geom_line() +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 2

4 Difference between 5000 nruns and others

for (i in c(100, 250, 500, 1000, 2500)) {
  fnm <- file.path(output_sens_data_path, paste0("res_nruns_", i, ".qs"))
  print(fnm)
  if (!file.exists(fnm) | overwrite == T) {
    res <- farm_run_MC(i) %>% mutate(nruns = as.factor(i))
    qsave(res, fnm)
  }
}
[1] "C:/Users/treimer/Documents/R-temp-files/local_to_global_mariculture_modelling/outputs/sensitivity_data/res_nruns_100.qs"
[1] "C:/Users/treimer/Documents/R-temp-files/local_to_global_mariculture_modelling/outputs/sensitivity_data/res_nruns_250.qs"
[1] "C:/Users/treimer/Documents/R-temp-files/local_to_global_mariculture_modelling/outputs/sensitivity_data/res_nruns_500.qs"
[1] "C:/Users/treimer/Documents/R-temp-files/local_to_global_mariculture_modelling/outputs/sensitivity_data/res_nruns_1000.qs"
[1] "C:/Users/treimer/Documents/R-temp-files/local_to_global_mariculture_modelling/outputs/sensitivity_data/res_nruns_2500.qs"
res_100 <- qread(file.path(output_sens_data_path, "res_nruns_100.qs"))
res_250 <- qread(file.path(output_sens_data_path, "res_nruns_250.qs"))
res_500 <- qread(file.path(output_sens_data_path, "res_nruns_500.qs"))
res_1000 <- qread(file.path(output_sens_data_path, "res_nruns_1000.qs"))
res_2500 <- qread(file.path(output_sens_data_path, "res_nruns_2500.qs"))

4.1 Difference between 100 and 5000 nruns

weight_100 <- res_100 %>% 
  filter(measure == "weight_stat")
weight_5000 <- res_5000 %>% 
  filter(measure == "weight_stat")

ggplot(
  rbind(weight_100, weight_5000),
  aes(x = t, y = mean, ymin = mean-sd, ymax = mean+sd, colour = nruns, fill = nruns)
) +
  geom_line() +
  geom_ribbon(alpha = 0.25) +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 3: Individual weight at each timestep, when the farm has been run with 5000 Monte-Carlo runs vs 100 Monte-Carlo runs.
df_diff_100 <- rbind(weight_100, weight_5000) %>% 
  select(-sd) %>% 
  pivot_wider(
    names_from = "nruns", names_prefix = "nruns_",
    values_from = mean
  ) %>% 
    mutate(nruns_diff = (nruns_100 - nruns_5000)/nruns_5000)

ggplot(df_diff_100, aes(x = t, y = 100*nruns_diff)) +
  geom_line() +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 4: Difference in individual weight at each timestep when the farm has been run with 5000 vs 100 Monte-Carlo runs.

4.2 Difference between 250 and 5000 nruns

weight_250 <- res_250 %>% 
  filter(measure == "weight_stat")
weight_5000 <- res_5000 %>% 
  filter(measure == "weight_stat")

ggplot(
  rbind(weight_250, weight_5000),
  aes(x = t, y = mean, ymin = mean-sd, ymax = mean+sd, colour = nruns, fill = nruns)
) +
  geom_line() +
  geom_ribbon(alpha = 0.25) +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 5: Individual weight at each timestep, when the farm has been run with 5000 Monte-Carlo runs vs 250 Monte-Carlo runs.
df_diff_250 <- rbind(weight_250, weight_5000) %>% 
  select(-sd) %>% 
  pivot_wider(
    names_from = "nruns", names_prefix = "nruns_",
    values_from = mean
  ) %>% 
    mutate(nruns_diff = (nruns_250 - nruns_5000)/nruns_5000)

ggplot(df_diff_250, aes(x = t, y = 100*nruns_diff)) +
  geom_line() +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 6: Difference in individual weight at each timestep when the farm has been run with 5000 vs 250 Monte-Carlo runs.

4.3 Difference between 500 and 5000 nruns

weight_500 <- res_500 %>% 
  filter(measure == "weight_stat")
weight_5000 <- res_5000 %>% 
  filter(measure == "weight_stat")

ggplot(
  rbind(weight_500, weight_5000),
  aes(x = t, y = mean, ymin = mean-sd, ymax = mean+sd, colour = nruns, fill = nruns)
) +
  geom_line() +
  geom_ribbon(alpha = 0.25) +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 7: Individual weight at each timestep, when the farm has been run with 5000 Monte-Carlo runs vs 500 Monte-Carlo runs.
df_diff_500 <- rbind(weight_500, weight_5000) %>% 
  select(-sd) %>% 
  pivot_wider(
    names_from = "nruns", names_prefix = "nruns_",
    values_from = mean
  ) %>% 
    mutate(nruns_diff = (nruns_500 - nruns_5000)/nruns_5000)

ggplot(df_diff_500, aes(x = t, y = 100*nruns_diff)) +
  geom_line() +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 8: Difference in individual weight at each timestep when the farm has been run with 5000 vs 500 Monte-Carlo runs.

4.4 Difference between 1000 and 5000 nruns

weight_1000 <- res_1000 %>% 
  filter(measure == "weight_stat")
weight_5000 <- res_5000 %>% 
  filter(measure == "weight_stat")

ggplot(
  rbind(weight_1000, weight_5000),
  aes(x = t, y = mean, ymin = mean-sd, ymax = mean+sd, colour = nruns, fill = nruns)
) +
  geom_line() +
  geom_ribbon(alpha = 0.25) +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 9: Individual weight at each timestep, when the farm has been run with 5000 Monte-Carlo runs vs 1000 Monte-Carlo runs.
df_diff_1000 <- rbind(weight_1000, weight_5000) %>% 
  select(-sd) %>% 
  pivot_wider(
    names_from = "nruns", names_prefix = "nruns_",
    values_from = mean
  ) %>% 
    mutate(nruns_diff = (nruns_1000 - nruns_5000)/nruns_5000)

ggplot(df_diff_1000, aes(x = t, y = 100*nruns_diff)) +
  geom_line() +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 10: Difference in individual weight at each timestep when the farm has been run with 5000 vs 1000 Monte-Carlo runs.

4.5 Difference between 2500 and 5000 nruns

weight_2500 <- res_2500 %>% 
  filter(measure == "weight_stat")
weight_5000 <- res_5000 %>% 
  filter(measure == "weight_stat")

ggplot(
  rbind(weight_2500, weight_5000),
  aes(x = t, y = mean, ymin = mean-sd, ymax = mean+sd, colour = nruns, fill = nruns)
) +
  geom_line() +
  geom_ribbon(alpha = 0.25) +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 11: Individual weight at each timestep, when the farm has been run with 5000 Monte-Carlo runs vs 2500 Monte-Carlo runs.
df_diff_2500 <- rbind(weight_2500, weight_5000) %>% 
  select(-sd) %>% 
  pivot_wider(
    names_from = "nruns", names_prefix = "nruns_",
    values_from = mean
  ) %>% 
    mutate(nruns_diff = (nruns_2500 - nruns_5000)/nruns_5000)

ggplot(df_diff_2500, aes(x = t, y = 100*nruns_diff)) +
  geom_line() +
  facet_wrap(facets = vars(farm_ID), nrow = 5) +
  theme_classic()
Figure 12: Difference in individual weight at each timestep when the farm has been run with 5000 vs 2500 Monte-Carlo runs.

5 Summary

df_diff <- data.frame(
  nruns = c(1, 100, 250, 500, 1000, 2500),
  max_diff = c(
    max(abs(df_diff_1$nruns_diff)),
    max(abs(df_diff_100$nruns_diff)),
    max(abs(df_diff_250$nruns_diff)),
    max(abs(df_diff_500$nruns_diff)),
    max(abs(df_diff_1000$nruns_diff)),
    max(abs(df_diff_2500$nruns_diff))
  )
)

ggplot(df_diff, aes(x = as.factor(nruns), y = 100*max_diff)) +
  geom_col() + theme_classic()